/*
This code attempts to estimate treatment effects of ppp
*/

do "${dodir}/make_globals.do"

*ssc install drdid, all replace
*ssc install csdid, all replace

////////////////////////////////////////////////////////////////////////////////
// step 1: wage growth regs
////////////////////////////////////////////////////////////////////////////////

use id q min_emp_2018 min_emp_2019 min_emp_2020q1 naics_2_mode st_mode ///
med_wage total_compensation num_employees g using ${datadir}/reg_data, clear

// drop if 0 or 1 employees at any point in 2018 or 2019
drop if min_emp_2018==0 | min_emp_2018==1 | min_emp_2019==0 | min_emp_2019==1 | min_emp_2020q1==0 | min_emp_2020q1==1

// drop if two digit naics is missing
drop if naics_2_mode==. | naics_2_mode==0

// drop if state is missing
drop if st_mode==0 | st_mode==.	

// drop restuarants (special covid treatment), public admin, and utilities
keep if (naics_2_mode~=92 & naics_2_mode~=22 & naics_2_mode~=72 & naics_2_mode~=49 & naics_2_mode~=55)

preserve

	gcollapse (lastnm) g, by(id)
	
	tab g

restore

// clean the compensation variables
replace med_wage=0 if med_wage==.
replace total_compensation=0 if total_compensation==.

// have wages be the max of med_wages and total_compensation
replace med_wage=max(med_wage, total_compensation)

gen mean_comp=med_wage/num_employees

// replace med_wage to be "100000*employee count" if mean comp exceeds 100k
replace med_wage=100000*num_employees if mean_comp>100000 & mean_comp~=.


// size bin
g firm_size_bin=0 if min_emp_2018<10
	replace firm_size_bin=1 if min_emp_2018>=10 & min_emp_2018<50
	replace firm_size_bin=2 if min_emp_2018>=50 & min_emp_2018<100
	replace firm_size_bin=3 if min_emp_2018>=100 & min_emp_2018<250
	replace firm_size_bin=4 if min_emp_2018>=250

// mean emp in 2019
g x_2019=num_employees if q>=5 & q<=8	
egen mean_emp_2019=mean(x_2019), by(id)
drop x_2019

// mean mean wages in 2019
g x_2019=med_wage if q>=5 & q<=8	
egen sum_wages_2019=sum(x_2019), by(id)
g mean_wages_2019=sum_wages_2019/mean_emp_2019
drop x_2019 sum_wages_2019
	
	
// time series
tsset id q

// define base year compensation	
g comp_2018=l8.med_wage if q>=9 & q<=12
	replace comp_2018=l4.med_wage if q>=5 & q<=8
	replace comp_2018=l12.med_wage if q>=13 & q<=16
	replace comp_2018=l16.med_wage if q>16
	
g comp_2019=l4.med_wage if q>=9 & q<=12
	replace comp_2019=l8.med_wage if q>=13 & q<=16
	replace comp_2019=l12.med_wage if q>16

// define base year outcome variable
g y=med_wage/comp_2018 if q>4

// cap outcome variable at 10
	replace y=10 if y>10 & y!=.

// drop if y is missing after the base year
gen deerop1=0
replace deerop1=1 if y==. & q>4
egen deerop2=max(deerop1), by(id)
drop if deerop2==1
drop deerop1 deerop2

// counts by treatment quarter
preserve

	drop if mean_emp_2019>700 & g==0

	gcollapse (lastnm) g mean_emp_2019 mean_wages_2019, by(id)
	
	tabstat mean_emp_2019 mean_wages_2019, s(mean) by(g) format(%9.0fc)
	
	tabstat mean_emp_2019 mean_wages_2019, s(p50) by(g) format(%9.0fc)
	
	
restore

// create cell identifiers
preserve

	gcollapse (mean) y_m=y, by(naics_2_mode st_mode firm_size_bin)
	
	g id=_n
	
	save $datadir/y_m, replace

restore

merge m:1 naics_2_mode st_mode firm_size_bin using $datadir/y_m, update replace nogen

g y_dm=y-y_m


// drop if never treated
keep if g>0 & g~=.	
	

forval xx= 0/5{
	
	if `xx' == 5 {
		csdid y_dm, i(id) t(q) g(g) method(dripw) wboot notyet
	}
	else {
		csdid y_dm if firm_size_bin==`xx', i(id) t(q) g(g) method(dripw) wboot notyet
	}
	
	
	
	estat simple, estore(bin`xx'_att)
	preserve
	clear
	svmat double r(table), names(col)
	gen stat = "coeff" if _n == 1
	replace stat = "sd" if _n == 2
	replace stat = "z" if _n == 3
	replace stat = "prob" if _n == 4
	replace stat = "bot_95" if _n == 5
	replace stat = "top_95" if _n == 6
	drop if _n > 6
	gen firm_size_bin = `xx'
	save "${outdir}/reg_tables/reg_att_growth_bin`xx'", replace
	restore	
	
	estat event

	preserve
	clear
	svmat double r(table), names(col)
	gen firm_size_bin = `xx'
	gen stat = "coeff" if _n == 1
	replace stat = "sd" if _n == 2
	replace stat = "z" if _n == 3
	replace stat = "prob" if _n == 4
	replace stat = "bot_95" if _n == 5
	replace stat = "top_95" if _n == 6
	drop if _n > 6
	merge 1:1 stat using "${outdir}/reg_tables/reg_att_growth_bin`xx'", nogen

	save "${outdir}/reg_tables/reg_table_growth_bin`xx'", replace
	
	restore
	
	estat event
	
	csdid_plot, graphregion(color(white)) bgcolor(white) ///
	ylabel(,format(%9.2fc)) ///
	ytitle("Growth in Wages (Base Year: 2018)") ///
	xtitle("Quarters to Treatment") xscale(r(-8 4)) xlabel(-8(2)4,format(%9.0fc)) 
	graph export $outdir/comp_growth_bin`xx'.png, replace width(3000)
}

// output coeficients to excel

use ${outdir}/reg_tables/reg_table_growth_bin0, clear

forval xx= 1/5{
	append using ${outdir}/reg_tables/reg_table_growth_bin`xx'
}

export excel "${outdir}/reg_tables/reg_table_growth.xlsx", replace sheet("appended") first(var)

////////////////////////////////////////////////////////////////////////////////
// step 2: emp growth regs
////////////////////////////////////////////////////////////////////////////////

use id q min_emp_2018 min_emp_2019 min_emp_2020q1 naics_2_mode st_mode ///
med_wage total_compensation num_employees g using ${datadir}/reg_data, clear

// drop if 0 or 1 employees at any point in 2018 or 2019
drop if min_emp_2018==0 | min_emp_2018==1 | min_emp_2019==0 | min_emp_2019==1 | min_emp_2020q1==0 | min_emp_2020q1==1

// drop if two digit naics is missing
drop if naics_2_mode==. | naics_2_mode==0

// drop if state is missing
drop if st_mode==0 | st_mode==.

// drop if never treated
keep if g>0 & g~=.	

// drop restuarants (special covid treatment), public admin, and utilities
keep if (naics_2_mode~=92 & naics_2_mode~=22 & naics_2_mode~=72 & naics_2_mode~=49 & naics_2_mode~=55)

// clean the compensation variables
replace med_wage=0 if med_wage==.
replace total_compensation=0 if total_compensation==.

// have wages be the max of med_wages and total_compensation
replace med_wage=max(med_wage, total_compensation)

gen mean_comp=med_wage/num_employees

// replace med_wage to be "100000*employee count" if mean comp exceeds 100k
replace med_wage=100000*num_employees if mean_comp>100000 & mean_comp~=.


// size bin
g firm_size_bin=0 if min_emp_2018<10
	replace firm_size_bin=1 if min_emp_2018>=10 & min_emp_2018<50
	replace firm_size_bin=2 if min_emp_2018>=50 & min_emp_2018<100
	replace firm_size_bin=3 if min_emp_2018>=100 & min_emp_2018<250
	replace firm_size_bin=4 if min_emp_2018>=250
	
// outcome variable: employment relative to the average quarterly employment in 2018 and 2019
	// percentage change in employment

// time series
tsset id q
	
g emp_2018=l8.num_employees if q>=9 & q<=12
	replace emp_2018=l4.num_employees if q>=5 & q<=8
	replace emp_2018=l12.num_employees if q>=13 & q<=16
	replace emp_2018=l16.num_employees if q>16

g emp_2019=l4.num_employees if q>=9 & q<=12
	replace emp_2019=l8.num_employees if q>=13 & q<=16
	replace emp_2019=l12.num_employees if q>16

// outcome variable: employment relative to the average quarterly employment in 2018 and 2019
g y=num_employees/emp_2018 if q>4

// cap outcome variable at 10
	replace y=10 if y>10 & y!=.

// drop if y is missing after the base year
gen deerop1=0
replace deerop1=1 if y==. & q>4
egen deerop2=max(deerop1), by(id)
drop if deerop2==1
drop deerop1 deerop2	

// create cell identifiers
preserve

	gcollapse (mean) y_m=y, by(naics_2_mode st_mode firm_size_bin)
	
	g id=_n
	
	save $datadir/y_m, replace

restore

merge m:1 naics_2_mode st_mode firm_size_bin using $datadir/y_m, update replace nogen

g y_dm=y-y_m

forval xx=0/5{
	
	if `xx' == 5 {
		csdid y_dm , i(id) t(q) g(g) method(dripw) wboot notyet
	}
	else {
		csdid y_dm if firm_size_bin==`xx', i(id) t(q) g(g) method(dripw) wboot notyet
	}
	
	
	estat simple, estore(bin`xx'_att)
	preserve
	clear
	svmat double r(table), names(col)
	gen stat = "coeff" if _n == 1
	replace stat = "sd" if _n == 2
	replace stat = "z" if _n == 3
	replace stat = "prob" if _n == 4
	replace stat = "bot_95" if _n == 5
	replace stat = "top_95" if _n == 6
	drop if _n > 6
	gen firm_size_bin = `xx'
	save "${outdir}/reg_tables/reg_emp_att_growth_bin`xx'", replace
	restore	
	
	estat event

	preserve
	clear
	svmat double r(table), names(col)
	gen firm_size_bin = `xx'
	gen stat = "coeff" if _n == 1
	replace stat = "sd" if _n == 2
	replace stat = "z" if _n == 3
	replace stat = "prob" if _n == 4
	replace stat = "bot_95" if _n == 5
	replace stat = "top_95" if _n == 6
	drop if _n > 6
	merge 1:1 stat using "${outdir}/reg_tables/reg_emp_att_growth_bin`xx'", nogen

	save "${outdir}/reg_tables/reg_emp_table_growth_bin`xx'", replace
	restore
	
	estat event
	
	csdid_plot, graphregion(color(white)) bgcolor(white) ///
	ylabel(,format(%9.2fc)) ///
	ytitle("Growth in Employees (Base Year: 2018)") ///
	xtitle("Quarters to Treatment") xscale(r(-8 4)) xlabel(-8(2)4,format(%9.0fc)) 
	graph export $outdir/emp_growth_bin`xx'.png, replace width(3000)
}

// output coeficients to excel

use ${outdir}/reg_tables/reg_emp_table_growth_bin0, clear

forval xx= 1/5{
	append using ${outdir}/reg_tables/reg_emp_table_growth_bin`xx'
}

export excel "${outdir}/reg_tables/reg_emp_table_growth.xlsx", replace sheet("appended") first(var)
